{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 导入json文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import json\n",
    "from tqdm import tqdm\n",
    "df = pd.DataFrame()\n",
    "data_path = 'QAMedicalKG/data/medical3.json'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## json转dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "21it [00:00, 194.84it/s]\n"
     ]
    }
   ],
   "source": [
    "p_feature = ['category', 'cure_department', 'symptom','check', 'acompany','cure_way','recommand_drug' , 'common_drug', 'drug_detail', 'do_eat','not_eat', 'recommand_eat']\n",
    "\n",
    "for data in tqdm(open(data_path, encoding='utf8')):\n",
    "    temp_data = json.loads(data)\n",
    "    for each in p_feature:\n",
    "        try:\n",
    "            temp_data[each] = ','.join(temp_data[each])\n",
    "        except:\n",
    "            pass\n",
    "    \n",
    "    df = df.append(temp_data, ignore_index=True)\n",
    "del df['_id']\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 调整列名和顺序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 182,
   "metadata": {},
   "outputs": [],
   "source": [
    "orders = ['name', 'desc', 'category', 'cure_department', 'cause', 'symptom', 'check', 'acompany', 'cost_money',\n",
    "       'cure_lasttime', 'cure_way', 'cured_prob', 'easy_get',\n",
    "        'get_prob', 'get_way', 'prevent',\n",
    "       'recommand_drug' , 'common_drug', 'drug_detail', 'do_eat',\n",
    "       'not_eat', 'recommand_eat', 'yibao_status']\n",
    "df = df[orders]\n",
    "orders_cn = ['疾病名称','疾病描述','疾病种类','科室','病因','症状','检查','并发症','花费','疗程','疗法','治愈率','易感人群','感染概率','感染途径','预防措施','推荐药物','常用药物','具体药物','可以吃','不可以吃','推荐吃','是否纳入医保']\n",
    "df.columns = orders_cn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>疾病名称</th>\n",
       "      <th>疾病描述</th>\n",
       "      <th>疾病种类</th>\n",
       "      <th>科室</th>\n",
       "      <th>病因</th>\n",
       "      <th>症状</th>\n",
       "      <th>检查</th>\n",
       "      <th>并发症</th>\n",
       "      <th>花费</th>\n",
       "      <th>疗程</th>\n",
       "      <th>...</th>\n",
       "      <th>感染概率</th>\n",
       "      <th>感染途径</th>\n",
       "      <th>预防措施</th>\n",
       "      <th>推荐药物</th>\n",
       "      <th>常用药物</th>\n",
       "      <th>具体药物</th>\n",
       "      <th>可以吃</th>\n",
       "      <th>不可以吃</th>\n",
       "      <th>推荐吃</th>\n",
       "      <th>是否纳入医保</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>肺泡蛋白质沉积症</td>\n",
       "      <td>肺泡蛋白质沉积症(简称PAP)，又称Rosen-Castle-man-Liebow综合征，是...</td>\n",
       "      <td>疾病百科,内科,呼吸内科</td>\n",
       "      <td>内科,呼吸内科</td>\n",
       "      <td>病因未明，推测与几方面因素有关：如大量粉尘吸入（铝，二氧化硅等），机体免疫功能下降（尤其婴幼...</td>\n",
       "      <td>紫绀,胸痛,呼吸困难,乏力,毓卓</td>\n",
       "      <td>胸部CT检查,肺活检,支气管镜检查</td>\n",
       "      <td>多重肺部感染</td>\n",
       "      <td>根据不同医院，收费标准不一致，省市三甲医院约（ 8000——15000 元）</td>\n",
       "      <td>约3个月</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00002%</td>\n",
       "      <td>无传染性</td>\n",
       "      <td>1、避免感染分支杆菌病，卡氏肺囊肿肺炎，巨细胞病毒等。\\n2、注意锻炼身体，提高免疫力。</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>否</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>百日咳</td>\n",
       "      <td>百日咳(pertussis，whoopingcough)是由百日咳杆菌所致的急性呼吸道传染病...</td>\n",
       "      <td>疾病百科,儿科,小儿内科</td>\n",
       "      <td>儿科,小儿内科</td>\n",
       "      <td>(一)发病原因\\n病原菌是鲍特菌属(Bordetella)中的百日咳鲍特菌(B.pertus...</td>\n",
       "      <td>吸气时有蝉鸣音,痉挛性咳嗽,胸闷,肺阴虚,抽搐,低热,闫鹏辉,惊厥</td>\n",
       "      <td>耳、鼻、咽拭子细菌培养,周围血白细胞计数及分类检验,血常规,酶联免疫吸附试验,白细胞分类计数</td>\n",
       "      <td>肺不张</td>\n",
       "      <td>根据不同医院，收费标准不一致，市三甲医院约（1000-4000元）</td>\n",
       "      <td>1-2个月</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5%</td>\n",
       "      <td>呼吸道传播</td>\n",
       "      <td>1、控制传染源：在流行季节，若有前驱症状应及早抗生素治疗。\\n2、切断传播途径：由于百日咳杆...</td>\n",
       "      <td>琥乙红霉素片,琥乙红霉素颗粒,百咳静糖浆,穿心莲内酯片,红霉素肠溶片,环酯红霉素片</td>\n",
       "      <td>穿心莲内酯片,百咳静糖浆</td>\n",
       "      <td>惠普森穿心莲内酯片(穿心莲内酯片),北京同仁堂百咳静糖浆(百咳静糖浆),邦琪药业百咳静糖浆(...</td>\n",
       "      <td>南瓜子仁,圆白菜,樱桃番茄,小白菜</td>\n",
       "      <td>螃蟹,海蟹,海虾,海螺</td>\n",
       "      <td>清蒸鸡蛋羹,百合双耳鸡蛋羹,排骨汤,罗汉果雪耳鸡汤,小黄瓜凉拌面,黄瓜三丝汤,黄瓜拌兔丝,黄...</td>\n",
       "      <td>否</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 23 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       疾病名称                                               疾病描述          疾病种类  \\\n",
       "0  肺泡蛋白质沉积症  肺泡蛋白质沉积症(简称PAP)，又称Rosen-Castle-man-Liebow综合征，是...  疾病百科,内科,呼吸内科   \n",
       "1       百日咳  百日咳(pertussis，whoopingcough)是由百日咳杆菌所致的急性呼吸道传染病...  疾病百科,儿科,小儿内科   \n",
       "\n",
       "        科室                                                 病因  \\\n",
       "0  内科,呼吸内科  病因未明，推测与几方面因素有关：如大量粉尘吸入（铝，二氧化硅等），机体免疫功能下降（尤其婴幼...   \n",
       "1  儿科,小儿内科  (一)发病原因\\n病原菌是鲍特菌属(Bordetella)中的百日咳鲍特菌(B.pertus...   \n",
       "\n",
       "                                  症状  \\\n",
       "0                   紫绀,胸痛,呼吸困难,乏力,毓卓   \n",
       "1  吸气时有蝉鸣音,痉挛性咳嗽,胸闷,肺阴虚,抽搐,低热,闫鹏辉,惊厥   \n",
       "\n",
       "                                               检查     并发症  \\\n",
       "0                               胸部CT检查,肺活检,支气管镜检查  多重肺部感染   \n",
       "1  耳、鼻、咽拭子细菌培养,周围血白细胞计数及分类检验,血常规,酶联免疫吸附试验,白细胞分类计数     肺不张   \n",
       "\n",
       "                                       花费     疗程  ...      感染概率   感染途径  \\\n",
       "0  根据不同医院，收费标准不一致，省市三甲医院约（ 8000——15000 元）   约3个月  ...  0.00002%   无传染性   \n",
       "1       根据不同医院，收费标准不一致，市三甲医院约（1000-4000元）  1-2个月  ...      0.5%  呼吸道传播   \n",
       "\n",
       "                                                预防措施  \\\n",
       "0       1、避免感染分支杆菌病，卡氏肺囊肿肺炎，巨细胞病毒等。\\n2、注意锻炼身体，提高免疫力。   \n",
       "1  1、控制传染源：在流行季节，若有前驱症状应及早抗生素治疗。\\n2、切断传播途径：由于百日咳杆...   \n",
       "\n",
       "                                        推荐药物          常用药物  \\\n",
       "0                                                      NaN   \n",
       "1  琥乙红霉素片,琥乙红霉素颗粒,百咳静糖浆,穿心莲内酯片,红霉素肠溶片,环酯红霉素片  穿心莲内酯片,百咳静糖浆   \n",
       "\n",
       "                                                具体药物                可以吃  \\\n",
       "0                                                                   NaN   \n",
       "1  惠普森穿心莲内酯片(穿心莲内酯片),北京同仁堂百咳静糖浆(百咳静糖浆),邦琪药业百咳静糖浆(...  南瓜子仁,圆白菜,樱桃番茄,小白菜   \n",
       "\n",
       "          不可以吃                                                推荐吃 是否纳入医保  \n",
       "0          NaN                                                NaN      否  \n",
       "1  螃蟹,海蟹,海虾,海螺  清蒸鸡蛋羹,百合双耳鸡蛋羹,排骨汤,罗汉果雪耳鸡汤,小黄瓜凉拌面,黄瓜三丝汤,黄瓜拌兔丝,黄...      否  \n",
       "\n",
       "[2 rows x 23 columns]"
      ]
     },
     "execution_count": 183,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "疾病名称                                                    百日咳\n",
       "疾病描述      百日咳(pertussis，whoopingcough)是由百日咳杆菌所致的急性呼吸道传染病...\n",
       "疾病种类                                           疾病百科,儿科,小儿内科\n",
       "科室                                                  儿科,小儿内科\n",
       "病因        (一)发病原因\\n病原菌是鲍特菌属(Bordetella)中的百日咳鲍特菌(B.pertus...\n",
       "症状                        吸气时有蝉鸣音,痉挛性咳嗽,胸闷,肺阴虚,抽搐,低热,闫鹏辉,惊厥\n",
       "检查           耳、鼻、咽拭子细菌培养,周围血白细胞计数及分类检验,血常规,酶联免疫吸附试验,白细胞分类计数\n",
       "并发症                                                     肺不张\n",
       "花费                        根据不同医院，收费标准不一致，市三甲医院约（1000-4000元）\n",
       "疗程                                                    1-2个月\n",
       "疗法                                               药物治疗,支持性治疗\n",
       "治愈率                                                     98%\n",
       "易感人群                                                  多见于小儿\n",
       "感染概率                                                   0.5%\n",
       "感染途径                                                  呼吸道传播\n",
       "预防措施      1、控制传染源：在流行季节，若有前驱症状应及早抗生素治疗。\\n2、切断传播途径：由于百日咳杆...\n",
       "推荐药物              琥乙红霉素片,琥乙红霉素颗粒,百咳静糖浆,穿心莲内酯片,红霉素肠溶片,环酯红霉素片\n",
       "常用药物                                           穿心莲内酯片,百咳静糖浆\n",
       "具体药物      惠普森穿心莲内酯片(穿心莲内酯片),北京同仁堂百咳静糖浆(百咳静糖浆),邦琪药业百咳静糖浆(...\n",
       "可以吃                                       南瓜子仁,圆白菜,樱桃番茄,小白菜\n",
       "不可以吃                                            螃蟹,海蟹,海虾,海螺\n",
       "推荐吃       清蒸鸡蛋羹,百合双耳鸡蛋羹,排骨汤,罗汉果雪耳鸡汤,小黄瓜凉拌面,黄瓜三丝汤,黄瓜拌兔丝,黄...\n",
       "是否纳入医保                                                    否\n",
       "Name: 1, dtype: object"
      ]
     },
     "execution_count": 184,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 导出csv文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv('medical_data.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
